import pandas as pd
import numpy as np
import matplotlib as plt
import json, re
import logging

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)
# 用于分析训练数据
'''
count    32825.000000
mean        72.680640
std         33.762943
min          2.000000
25%         54.000000
50%         67.000000
75%         85.000000
max        306.000000
Name: length_input_one, dtype: float64
'''

def main():
    df = pd.read_csv('cls_12th_train.csv')
    input_one = df['input_one'].tolist()
    len_input_one = [len(i.split(' ')) for i in input_one]
    df['length_input_one'] = len_input_one
    all_nums = 0
    for i in range(len(input_one)):
        cur_num = input_one[i].split(' ')
        # print(i, '****', len(cur_num))
        all_nums += len(cur_num)
    print(all_nums)
    print(all_nums / len(input_one))
    a = df['length_input_one'].describe()
    print(a)
    pass


if __name__ == '__main__':
    main()
    pass
